# UNHCR
# Author: Connor Kelly
# Date: Nov 24, 2020

library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)

Setup

I am analyzing data from UNHCR to understand the flows of asylum seekers and refugees from country to country.

First, I do some initial setup work. My main goals here are to clean the data for easier use, replacing missing values as zero, create new variables, and rearrange the dataset in a way that allows for easier analysis later on.

setwd("C:/Users/Connor/Desktop/SDL")
flow <- read_excel("data/UNHCR/new_ref_arrival_new_asy_app_1962_2019.xlsx", skip = 1)

flow[is.na(flow)] <- 0 # Replace missing values as 0

# Start by looking at origin
flow_origin <- flow %>% # Aggregate by country origin
  group_by(`Origin`, `Population type`) %>%
  summarise_if(is.numeric, sum)
  
flow_sum <- flow_origin %>% # Create new population type "total" equal to sum of asylum seekers and refugees from given country of origin
  group_by(`Origin`) %>%
  summarise_if(is.numeric, sum) 
flow_sum$`Population type` <- "Total" # Specify population type name
flow_sum <- flow_sum[,c(1, 60, 2:59)] # Reorder to standard format

flow_origin <- bind_rows(flow_origin, flow_sum) # Add total to original dataframe

# Priority Countries
origin <- data.frame(t(flow_sum)) # Transpose dataframe for ease of use
  colnames(origin) <- flow_sum$Origin # Change column names to countries
  origin <- origin[-c(1,2),] # Remove redundant rows
  origin$year <- rownames(origin) # change row names to years
  origin$year <- as.numeric(origin$year) # Create year variable
  origin <- origin[c(231, 1:230)] # Bring year variable to front

  origin[, 1:231] <- sapply(origin[, 1:231], as.character) # Convert data to numeric
  # Unclear why I need to first convert from factor to character and THEN character to numeric but this works
  origin[, 1:231] <- sapply(origin[, 1:231], as.numeric)

Flows from country of origin

Next, I generate plots of the number of asylum seekers and refugees fleeing a given country over time. The R package “plotly” generates interactive plots which make it easier to pinpoint specific observations over time. You can hover your cursor over the graph to see the precise number of refugees and asylum seekers in a given year. You can also zoom in or highlight specific periods of interest.

# Afghanistan
afg_p <- ggplot(origin, aes(year, Afghanistan)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Afghanistan") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(afg_p) 
# Burundi
bdi_p <- ggplot(origin, aes(year, Burundi)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Burundi") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(bdi_p)
# Chad
tcd_p <- ggplot(origin, aes(year, Chad)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Chad") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(tcd_p)
# Iraq
irq_p <- ggplot(origin, aes(year, Iraq)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Iraq") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(irq_p)
# Syria
syr_p <- ggplot(origin, aes(year, `Syrian Arab Rep.`)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Syria") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(syr_p)
# Libya
lby_p <- ggplot(origin, aes(year, Libya)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Libya") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(lby_p)
# Sudan
sdn_p <- ggplot(origin, aes(year, Sudan)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Sudan") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(sdn_p)
# South Sudan
ssd_p <- ggplot(origin, aes(year, `South Sudan`)) + geom_line() + ggtitle("Refugees and Aylum Seekers from South Sudan") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(ssd_p)
# Mali
mli_p <- ggplot(origin, aes(year, Mali)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Mali") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(mli_p)
# Nigeria
nga_p <- ggplot(origin, aes(year, Nigeria)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Nigeria") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(nga_p)
# Somalia
som_p <- ggplot(origin, aes(year, Somalia)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Somalia") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(som_p)
# Guatemala
gtm_p <- ggplot(origin, aes(year, Guatemala)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Guatemala") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(gtm_p)
# Venezuela
ven_p <- ggplot(origin, aes(year, `Venezuela (Bolivarian Republic of)`)) + geom_line() + ggtitle("Refugees and Aylum Seekers from Venezuela") + ylab("Refugees and Asylum Seekers") + xlab("Year")
ggplotly(ven_p)

Destinations of asylum seekers

Next, I want to see where exactly asylum seekers from these priority countries are going. For each country of interest, I disaggregate my data based on country of asylum (i.e. to where have asylum seekers fled). I then plot the number of asylum seekers in each country of asylum over time. This makes for a busy plot, but the Plotly package makes this more managable. If you wish to toggle whether a certain country is shown on the plot, you can click on the country name in the legend. If you wish to isolate the graph to show only one country, double click on that country in the legend.

# Flow Destination (Country of asylum)

# Keep observations with origins in priority countries
flow_dest <- flow %>% filter(
  Origin %in% c("Afghanistan", "Burundi", "Chad", "Iraq", "Syrian Arab Rep.", "Libya", "Sudan", "South Sudan", "Mali",
                "Nigeria", "Somalia", "Guatemala", "Venezuela (Bolivarian Republic of)"))
flow_dest <- flow_dest[, c(2, 1, 3:61)]

# I'll have to do this country by country, less elegant than when only looking at origin

# Afghanistan
afg <- flow_dest %>% filter(
  `Origin` == "Afghanistan", `Population type` == "Asylum-seekers"
  )
afg <- afg[, !(names(afg) %in% c("Origin", "Population type"))]

afg <- pivot_longer(afg, # Make wide data long for easier plotting
             cols = c(2:59),
             names_to = "year",
             values_to = "asylumseekers")


afg$`Country of asylum` <- as.factor(afg$`Country of asylum`)
afg$year <- as.numeric(afg$year)
afg$asylumseekers <- as.numeric(afg$asylumseekers)

afg_p <- ggplot(afg, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
    ggtitle("Destinations of Asylum Seekers from Afghanistan") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(afg_p)
# Burundi
bdi <- flow_dest %>% filter(
  `Origin` == "Burundi", `Population type` == "Asylum-seekers"
  )
bdi <- bdi[, !(names(bdi) %in% c("Origin", "Population type"))]

bdi <- pivot_longer(bdi, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
bdi$`Country of asylum` <- as.factor(bdi$`Country of asylum`)
bdi$year <- as.numeric(bdi$year)
bdi$asylumseekers <- as.numeric(bdi$asylumseekers)

bdi_p <- ggplot(bdi, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Burundi") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(bdi_p)
# Chad
tcd <- flow_dest %>% filter(
  `Origin` == "Chad", `Population type` == "Asylum-seekers"
)
tcd <- tcd[, !(names(tcd) %in% c("Origin", "Population type"))]

tcd <- pivot_longer(tcd, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
tcd$`Country of asylum` <- as.factor(tcd$`Country of asylum`)
tcd$year <- as.numeric(tcd$year)
tcd$asylumseekers <- as.numeric(tcd$asylumseekers)

tcd_p <- ggplot(tcd, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Chad") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(tcd_p)
# Iraq
irq <- flow_dest %>% filter(
  `Origin` == "Iraq", `Population type` == "Asylum-seekers"
)
irq <- irq[, !(names(irq) %in% c("Origin", "Population type"))]

irq <- pivot_longer(irq, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
irq$`Country of asylum` <- as.factor(irq$`Country of asylum`)
irq$year <- as.numeric(irq$year)
irq$asylumseekers <- as.numeric(irq$asylumseekers)

irq_p <- ggplot(irq, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Iraq") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(irq_p)
# Syria
syr <- flow_dest %>% filter(
  `Origin` == "Syrian Arab Rep.", `Population type` == "Asylum-seekers"
)
syr <- syr[, !(names(syr) %in% c("Origin", "Population type"))]

syr <- pivot_longer(syr, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
syr$`Country of asylum` <- as.factor(syr$`Country of asylum`)
syr$year <- as.numeric(syr$year)
syr$asylumseekers <- as.numeric(syr$asylumseekers)

syr_p <- ggplot(syr, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Syria") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(syr_p)
# Libya
lby <- flow_dest %>% filter(
  `Origin` == "Libya", `Population type` == "Asylum-seekers"
)
lby <- lby[, !(names(lby) %in% c("Origin", "Population type"))]

lby <- pivot_longer(lby, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
lby$`Country of asylum` <- as.factor(lby$`Country of asylum`)
lby$year <- as.numeric(lby$year)
lby$asylumseekers <- as.numeric(lby$asylumseekers)

lby_p <- ggplot(lby, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Libya") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(lby_p)
# Sudan
sdn <- flow_dest %>% filter(
  `Origin` == "Sudan", `Population type` == "Asylum-seekers"
)
sdn <- sdn[, !(names(sdn) %in% c("Origin", "Population type"))]

sdn <- pivot_longer(sdn, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
sdn$`Country of asylum` <- as.factor(sdn$`Country of asylum`)
sdn$year <- as.numeric(sdn$year)
sdn$asylumseekers <- as.numeric(sdn$asylumseekers)

sdn_p <- ggplot(sdn, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Sudan") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(sdn_p)
# South Sudan
ssd <- flow_dest %>% filter(
  `Origin` == "South Sudan", `Population type` == "Asylum-seekers"
)
ssd <- ssd[, !(names(ssd) %in% c("Origin", "Population type"))]

ssd <- pivot_longer(ssd, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
ssd$`Country of asylum` <- as.factor(ssd$`Country of asylum`)
ssd$year <- as.numeric(ssd$year)
ssd$asylumseekers <- as.numeric(ssd$asylumseekers)

ssd_p <- ggplot(ssd, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from South Sudan") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(ssd_p)
# Mali
mli <- flow_dest %>% filter(
  `Origin` == "Mali", `Population type` == "Asylum-seekers"
)
mli <- mli[, !(names(mli) %in% c("Origin", "Population type"))]

mli <- pivot_longer(mli, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
mli$`Country of asylum` <- as.factor(mli$`Country of asylum`)
mli$year <- as.numeric(mli$year)
mli$asylumseekers <- as.numeric(mli$asylumseekers)

mli_p <- ggplot(mli, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Mali") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(mli_p)
# Nigeria
nga <- flow_dest %>% filter(
  `Origin` == "Nigeria", `Population type` == "Asylum-seekers"
)
nga <- nga[, !(names(nga) %in% c("Origin", "Population type"))]

nga <- pivot_longer(nga, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
nga$`Country of asylum` <- as.factor(nga$`Country of asylum`)
nga$year <- as.numeric(nga$year)
nga$asylumseekers <- as.numeric(nga$asylumseekers)

nga_p <- ggplot(nga, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Nigeria") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(nga_p)
# Somalia
som <- flow_dest %>% filter(
  `Origin` == "Somalia", `Population type` == "Asylum-seekers"
)
som <- som[, !(names(som) %in% c("Origin", "Population type"))]

som <- pivot_longer(som, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
som$`Country of asylum` <- as.factor(som$`Country of asylum`)
som$year <- as.numeric(som$year)
som$asylumseekers <- as.numeric(som$asylumseekers)

som_p <- ggplot(som, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Somalia") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(som_p)
# Guatemala
gtm <- flow_dest %>% filter(
  `Origin` == "Guatemala", `Population type` == "Asylum-seekers"
)
gtm <- gtm[, !(names(gtm) %in% c("Origin", "Population type"))]

gtm <- pivot_longer(gtm, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
gtm$`Country of asylum` <- as.factor(gtm$`Country of asylum`)
gtm$year <- as.numeric(gtm$year)
gtm$asylumseekers <- as.numeric(gtm$asylumseekers)

gtm_p <- ggplot(gtm, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Guatemala") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(gtm_p)
# Venezuela
ven <- flow_dest %>% filter(
  `Origin` == "Venezuela (Bolivarian Republic of)", `Population type` == "Asylum-seekers"
)
ven <- ven[, !(names(ven) %in% c("Origin", "Population type"))]

ven <- pivot_longer(ven, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
ven$`Country of asylum` <- as.factor(ven$`Country of asylum`)
ven$year <- as.numeric(ven$year)
ven$asylumseekers <- as.numeric(ven$asylumseekers)

ven_p <- ggplot(ven, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Venezuela") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(ven_p)